op {
  graph_op_name: "ApplyAdam"
  in_arg {
    name: "var"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "m"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "v"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "beta1_power"
    description: <<END
Must be a scalar.
END
  }
  in_arg {
    name: "beta2_power"
    description: <<END
Must be a scalar.
END
  }
  in_arg {
    name: "lr"
    description: <<END
Scaling factor. Must be a scalar.
END
  }
  in_arg {
    name: "beta1"
    description: <<END
Momentum factor. Must be a scalar.
END
  }
  in_arg {
    name: "beta2"
    description: <<END
Momentum factor. Must be a scalar.
END
  }
  in_arg {
    name: "epsilon"
    description: <<END
Ridge term. Must be a scalar.
END
  }
  in_arg {
    name: "grad"
    description: <<END
The gradient.
END
  }
  out_arg {
    name: "out"
    description: <<END
Same as "var".
END
  }
  attr {
    name: "use_locking"
    description: <<END
If `True`, updating of the var, m, and v tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
END
  }
  attr {
    name: "use_nesterov"
    description: <<END
If `True`, uses the nesterov update.
END
  }
  summary: "Update \'*var\' according to the Adam algorithm."
  description: <<END
$$\text{lr}_t := \mathrm{lr} \cdot \frac{\sqrt{1 - \beta_2^t}}{1 - \beta_1^t}$$
$$m_t := \beta_1 \cdot m_{t-1} + (1 - \beta_1) \cdot g$$
$$v_t := \beta_2 \cdot v_{t-1} + (1 - \beta_2) \cdot g^2$$
$$\text{var} := \begin{cases} \text{var} - (m_t \beta_1 + g \cdot (1 - \beta_1))\cdot\text{lr}_t/(\sqrt{v_t} + \epsilon), &\text{if use_nesterov}\\\\  \text{var} - m_t \cdot \text{lr}_t /(\sqrt{v_t} + \epsilon), &\text{otherwise} \end{cases}$$
END
}
